$\textbf{LOB ,}$ $\text{using only } \textit{Limit Order Book}$
$\textbf{LIQ ,}$ $\text{using only } \textit{Liquidity Measures}$
$\textbf{LOB+LIQ ,}$ $\text{using } \textit{Limit Order Book} \textbf{ and } \textit{Liquidity Measures } \text{together}$
from IPython.display import HTML
def hide_code(): return HTML('''<script>code_show=true; function code_toggle() {if (code_show){$('div.input').hide();} else {$('div.input').show();}code_show = !code_show} $( document ).ready(code_toggle);</script><form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
hide_code()
The training and validation performances of each model are shown below with $\textbf{Mean Squared Error (MSE)}$ and $\textbf{Mean Absolute Percentage Error (MAPE)}$ as the chosen performance criteria. Results obtained with linear regression $\textbf{(LinReg)}$ on prices are also added for comparison. Each value is rounded with a relative error less than 1%.
import pandas as pd
import numpy as np
from utils.df_handler import get_significant_figures
errors=[]
for error_type in ['se','ape']:
for data_type in ['LOB','LIQ','LOB+LIQ']:
with open('./eval/models/errors/' + f'{data_type}.npy', 'rb') as input:
nn_errors = np.load(input,allow_pickle='TRUE').item()
with open('./eval/models/errors/' + f'LinReg_{data_type}.npy', 'rb') as input:
linreg_errors = np.load(input,allow_pickle='TRUE').item()
for model_errors in [nn_errors,linreg_errors]:
errors.append(np.ravel([[i,k] for i,k in \
zip(np.mean(model_errors[error_type+'_train'],0),np.mean(model_errors[error_type+'_val'],0))]).tolist())
indices = pd.MultiIndex.from_tuples([(i,j,k) for i in ['MSE','MAPE (%)'] \
for j in ['LOB','LIQ','LOB+LIQ'] \
for k in ['NN','LinReg']])
columns = pd.MultiIndex.from_tuples([(i,k)
for i in ['mid price','bid price expectation','ask price expectation','bid price variance','ask price variance'] \
for k in ['train','val']])
errors_df = pd.DataFrame(errors,index=indices,columns=columns)
get_significant_figures(errors_df,3,0.01)
Below we have plots illustrating the validation performances of the models.
from utils.plotter import plotter
model_names = [' '*i+k+' '*i for i in range(3) for k in ['NN','LinReg']]#np.ravel([[i,f'LinReg ({i})'] for i in ['LOB','LIQ','LOB+LIQ']]).tolist()
errors_df_rounded = get_significant_figures(errors_df,3,0.01)
args = [ #FIRST AXIS
[ [model_names,errors_df[('mid price','val')]['MSE'].values,'X',dict(color='blue',label='MSE',markersize=15,alpha=0.5)]
,[errors_df[('mid price','val')]['MSE'].values,[*range(len(model_names))],[-1]*len(model_names),'blue','dashed',dict(linewidth=0.4)]
,[-0.5,len(model_names)-0.5],None#[0,0.26]
,{'plot':[model_names,errors_df[('mid price','val')]['MAPE (%)'].values,'o'],'plot_specs':dict(marker='o',color='limegreen',label='MAPE',markersize=20,mfc='None'),'hline_specs':dict(linewidth=0.4),'ylabel':dict(ylabel='MAPE (%)',fontsize=15,rotation=-90,labelpad=18),'tick_params':dict(axis='both',labelsize=12)}
,[dict(axis='both',labelsize=12)]
,['',dict(fontsize=15)], ['MSE',dict(fontsize=15)], ['Mid Price',dict(fontsize=20)]
,[dict(cellText=[errors_df_rounded[('mid price','val')]['MSE'].values.tolist(),errors_df_rounded[('mid price','val')]['MAPE (%)'].values.tolist()],rowLabels=['MSE','MAPE (%)'],cellLoc='center',bbox=[0, -0.22, 1, 0.1])]
,[dict(cellText=[['Trained with '+i for i in ('LOB', 'LIQ','LOB+LIQ')]],cellLoc='center',colLabels=['']*3,edges='vertical',bbox=[0, -0.12, 1, 0.12]),{'text_props':dict(set_color=['red'])}] #hor,ver,hor_size,ver_size
,[dict(ncol=1,shadow=1,labelspacing=0.3,fontsize=15,loc='upper left')],[dict(b=True,axis='x',alpha=0.5)]
]
,#SECOND AXIS
[ [model_names,errors_df[('bid price expectation','val')]['MSE'].values,'X',dict(color='blue',label='MSE',markersize=15,alpha=0.5)]
,[errors_df[('bid price expectation','val')]['MSE'].values,[*range(len(model_names))],[-1]*len(model_names),'blue','dashed',dict(linewidth=0.4)]
,[-0.5,len(model_names)-0.5],None#[0,0.26]
,{'plot':[model_names,errors_df[('bid price expectation','val')]['MAPE (%)'].values,'o'],'plot_specs':dict(marker='o',color='limegreen',label='MAPE',markersize=20,mfc='None'),'hline_specs':dict(linewidth=0.4),'ylabel':dict(ylabel='MAPE (%)',fontsize=15,rotation=-90,labelpad=18),'tick_params':dict(axis='both',labelsize=12)}
,[dict(axis='both',labelsize=12)]
,['',dict(fontsize=15)], ['MSE',dict(fontsize=15)], ['Bid Price Expectation',dict(fontsize=20)]
,[dict(cellText=[errors_df_rounded[('bid price expectation','val')]['MSE'].values.tolist(),errors_df_rounded[('bid price expectation','val')]['MAPE (%)'].values.tolist()],rowLabels=['MSE','MAPE (%)'],cellLoc='center',bbox=[0, -0.22, 1, 0.1])]
,[dict(cellText=[['Trained with '+i for i in ('LOB', 'LIQ','LOB+LIQ')]],cellLoc='center',colLabels=['']*3,edges='vertical',bbox=[0, -0.12, 1, 0.12]),{'text_props':dict(set_color=['red'])}] #hor,ver,hor_size,ver_size
,[dict(ncol=1,shadow=1,labelspacing=0.3,fontsize=15,loc='upper left')],[dict(b=True,axis='x',alpha=0.5)]
]
,#THIRD AXIS
[ [model_names,errors_df[('ask price expectation','val')]['MSE'].values,'X',dict(color='blue',label='MSE',markersize=15,alpha=0.5)]
,[errors_df[('ask price expectation','val')]['MSE'].values,[*range(len(model_names))],[-1]*len(model_names),'blue','dashed',dict(linewidth=0.4)]
,[-0.5,len(model_names)-0.5],None#[0,0.26]
,{'plot':[model_names,errors_df[('ask price expectation','val')]['MAPE (%)'].values,'o'],'plot_specs':dict(marker='o',color='limegreen',label='MAPE',markersize=20,mfc='None'),'hline_specs':dict(linewidth=0.4),'ylabel':dict(ylabel='MAPE (%)',fontsize=15,rotation=-90,labelpad=18),'tick_params':dict(axis='both',labelsize=12)}
,[dict(axis='both',labelsize=12)]
,['',dict(fontsize=15)], ['MSE',dict(fontsize=15)], ['Ask Price Expectation',dict(fontsize=20)]
,[dict(cellText=[errors_df_rounded[('ask price expectation','val')]['MSE'].values.tolist(),errors_df_rounded[('ask price expectation','val')]['MAPE (%)'].values.tolist()],rowLabels=['MSE','MAPE (%)'],cellLoc='center',bbox=[0, -0.22, 1, 0.1])]
,[dict(cellText=[['Trained with '+i for i in ('LOB', 'LIQ','LOB+LIQ')]],cellLoc='center',colLabels=['']*3,edges='vertical',bbox=[0, -0.12, 1, 0.12]),{'text_props':dict(set_color=['red'])}] #hor,ver,hor_size,ver_size
,[dict(ncol=1,shadow=1,labelspacing=0.3,fontsize=15,loc='upper left')],[dict(b=True,axis='x',alpha=0.5)]
]
,#FOURTH AXIS
[ [model_names,errors_df[('bid price variance','val')]['MSE'].values,'X',dict(color='blue',label='MSE',markersize=15,alpha=0.5)]
,[errors_df[('bid price variance','val')]['MSE'].values,[*range(len(model_names))],[-1]*len(model_names),'blue','dashed',dict(linewidth=0.4)]
,[-0.5,len(model_names)-0.5],None#[0,0.26]
,{'plot':[model_names,errors_df[('bid price variance','val')]['MAPE (%)'].values,'o'],'plot_specs':dict(marker='o',color='limegreen',label='MAPE',markersize=20,mfc='None'),'hline_specs':dict(linewidth=0.4),'ylabel':dict(ylabel='MAPE (%)',fontsize=15,rotation=-90,labelpad=18),'tick_params':dict(axis='both',labelsize=12)}
,[dict(axis='both',labelsize=12)]
,['',dict(fontsize=15)], ['MSE',dict(fontsize=15)], ['Bid Price Variance',dict(fontsize=20)]
,[dict(cellText=[errors_df_rounded[('bid price variance','val')]['MSE'].values.tolist(),errors_df_rounded[('bid price variance','val')]['MAPE (%)'].values.tolist()],rowLabels=['MSE','MAPE (%)'],cellLoc='center',bbox=[0, -0.22, 1, 0.1])]
,[dict(cellText=[['Trained with '+i for i in ('LOB', 'LIQ','LOB+LIQ')]],cellLoc='center',colLabels=['']*3,edges='vertical',bbox=[0, -0.12, 1, 0.12]),{'text_props':dict(set_color=['red'])}] #hor,ver,hor_size,ver_size
,[dict(ncol=1,shadow=1,labelspacing=0.3,fontsize=15,loc='lower left')],[dict(b=True,axis='x',alpha=0.5)]
]
,#FIFTH AXIS
[ [model_names,errors_df[('ask price variance','val')]['MSE'].values,'X',dict(color='blue',label='MSE',markersize=15,alpha=0.5)]
,[errors_df[('ask price variance','val')]['MSE'].values,[*range(len(model_names))],[-1]*len(model_names),'blue','dashed',dict(linewidth=0.4)]
,[-0.5,len(model_names)-0.5],None#[0,0.26]
,{'plot':[model_names,errors_df[('ask price variance','val')]['MAPE (%)'].values,'o'],'plot_specs':dict(marker='o',color='limegreen',label='MAPE',markersize=20,mfc='None'),'hline_specs':dict(linewidth=0.4),'ylabel':dict(ylabel='MAPE (%)',fontsize=15,rotation=-90,labelpad=18),'tick_params':dict(axis='both',labelsize=12)}
,[dict(axis='both',labelsize=12)]
,['',dict(fontsize=15)], ['MSE',dict(fontsize=15)], ['Ask Price Variance',dict(fontsize=20)]
,[dict(cellText=[errors_df_rounded[('ask price variance','val')]['MSE'].values.tolist(),errors_df_rounded[('ask price variance','val')]['MAPE (%)'].values.tolist()],rowLabels=['MSE','MAPE (%)'],cellLoc='center',bbox=[0, -0.22, 1, 0.1])]
,[dict(cellText=[['Trained with '+i for i in ('LOB', 'LIQ','LOB+LIQ')]],cellLoc='center',colLabels=['']*3,edges='vertical',bbox=[0, -0.12, 1, 0.12]),{'text_props':dict(set_color=['red'])}] #hor,ver,hor_size,ver_size
,[dict(ncol=1,shadow=1,labelspacing=0.3,fontsize=15,loc='lower left')],[dict(b=True,axis='x',alpha=0.5)]
]
]
attrs = ['plot'
,'hlines'
,'set_xlim','set_ylim'
,'make_twiny'
,'tick_params'
,'set_xlabel','set_ylabel','set_title'
,'make_table'
,'make_table'
,'legend','grid']
plotter(args,attrs,fig_title='Validation Performance of Models\n Stock: GARAN\n Year: 2017',dpi=600, ncols=1,xpad=-10,ypad=5,hspace = 0.32,suptitle_y=0.95)
We are showing two types of erros per plot:
Squared Error shown above the x-axis,
Relative Error shown below the x-axis
Below is a table showing the type of histogram shown in the plot in the corresponding quadrant for both Neural Network and Linear Regression models:
| Squared Training Errors | Squared Validation Errors |
|---|---|
| Relative Training Errors | Relative Validation Errors |
where $y$: Ground Truth and $\hat{y}$: Prediction.
from utils.plotter import plotter
import numpy as np
def get_xlim(errors_h,errors,tol):
for i,k in enumerate(np.where(errors_h>tol*errors_h.max(),errors_h,0)[::-1]):
if k != 0:
return errors[len(errors)-i-1]
def get_axis_args(data_type,observable,bins,xlim_tol,**kwargs):
obs_dict = {'Mid Price':0,'Bid Price Expectation':1,'Ask Price Expectation':2,'Bid Price Variance':3,'Ask Price Variance':4}
n = obs_dict[observable]
data_type_dict = {'LOB':'a) ','LIQ':'b) ','LOB+LIQ':'c) '}
with open('./eval/models/errors/' + f'{data_type}.npy', 'rb') as input:
nn_errors = np.load(input,allow_pickle='TRUE').item()
with open('./eval/models/errors/' + f'LinReg_{data_type}.npy', 'rb') as input:
linreg_errors = np.load(input,allow_pickle='TRUE').item()
se_train = nn_errors['se_train'][:,n] ; se_train /= se_train.mean()
se_train_reg = linreg_errors['se_train'][:,n] ; se_train_reg /= se_train_reg.mean()
se_val = nn_errors['se_val'][:,n] ; se_val /= se_val.mean()
se_val_reg = linreg_errors['se_val'][:,n] ; se_val_reg /= se_val_reg.mean()
ape_train = nn_errors['ape_train'][:,n] ; ape_train /= ape_train.mean()
ape_train_reg = linreg_errors['ape_train'][:,n] ; ape_train_reg /= ape_train_reg.mean()
ape_val = nn_errors['ape_val'][:,n] ; ape_val /= ape_val.mean()
ape_val_reg = linreg_errors['ape_val'][:,n] ; ape_val_reg /= ape_val_reg.mean()
train_h , train= np.histogram(se_train,bins) ; train = train[:-1] * 100 ; train_h = train_h/train_h.sum() * 100
train_h_reg , train_reg = np.histogram(se_train_reg,bins) ; train_reg = train_reg[:-1] * 100 ; train_h_reg = train_h_reg/train_h_reg.sum() * 100
val_h , val= np.histogram(se_val,bins) ; val = val[:-1] * 100 ; val_h = val_h/val_h.sum() * 100
val_h_reg , val_reg = np.histogram(se_val_reg,bins) ; val_reg = val_reg[:-1] * 100 ; val_h_reg = val_h_reg/val_h_reg.sum() * 100
train_h_ape , train_ape= np.histogram(ape_train,bins) ; train_ape = train_ape[:-1] * 100 ; train_h_ape = train_h_ape/train_h_ape.sum() * 100
train_h_reg_ape , train_reg_ape = np.histogram(ape_train_reg,bins) ; train_reg_ape = train_reg_ape[:-1] * 100 ; train_h_reg_ape = train_h_reg_ape/train_h_reg_ape.sum() * 100
val_h_ape , val_ape = np.histogram(ape_val,bins) ; val_ape = val_ape[:-1] * 100 ; val_h_ape = val_h_ape/val_h_ape.sum() * 100
val_h_reg_ape , val_reg_ape = np.histogram(ape_val_reg,bins) ; val_reg_ape = val_reg_ape[:-1] * 100 ; val_h_reg_ape = val_h_reg_ape/val_h_reg_ape.sum() * 100
xlim= max([get_xlim(i,k,xlim_tol) for i,k in zip([train_h,train_h_reg,val_h,val_h_reg,train_h_ape,train_h_reg_ape,val_h_ape,val_h_reg_ape],[train,train_reg,val,val_reg,train_ape,train_reg_ape,val_ape,val_reg_ape])])
axis_args = [
[ [-train,train_h,-np.diff(train)[0],dict(align='edge',color='blue',alpha=0.8)],[val,val_h,np.diff(val)[0],dict(align='edge',color='red',alpha=0.8)],[-train_reg,train_h_reg,-np.diff(train_reg)[0],dict(align='edge',color='green',alpha=0.8)],[val_reg,val_h_reg,np.diff(val_reg)[0],dict(align='edge',color='gold',alpha=0.8)]
,[-train_ape,-train_h_ape,-np.diff(train_ape)[0],dict(align='edge',color='blue',alpha=0.4)],[val_ape,-val_h_ape,np.diff(val_ape)[0],dict(align='edge',color='red',alpha=0.4)],[-train_reg_ape,-train_h_reg_ape,-np.diff(train_reg_ape)[0],dict(align='edge',color='green',alpha=0.4)],[val_reg_ape,-val_h_reg_ape,np.diff(val_reg_ape)[0],dict(align='edge',color='gold',alpha=0.4)]
,[[],'-',dict(color='blue',linewidth=2,alpha=0.5,fillstyle='left')],[[],'-',dict(color='red',linewidth=2,alpha=0.5,fillstyle='right')],[[],'-',dict(color='green',linewidth=2,alpha=0.5,fillstyle='left')],[[],'-',dict(color='gold',linewidth=2,alpha=0.5,fillstyle='right')]
,[-xlim,xlim]
,[dict(color='black', lw=0.5)],[dict(color='black', lw=0.5)]
,dict(x='positive',y='positive')
,['Errors Relative to the Mean (%)',dict(fontsize=15)],['Normalized Error Counts (%)',dict(fontsize=15)],[data_type_dict[data_type]+data_type,dict(fontsize=20)]
,[[0.05, 0, 'Relative Errors'],dict(color='saddlebrown',alpha=0.6,fontsize=20,horizontalalignment='left',verticalalignment='bottom')],[[0.05, 0.95, 'Squared Errors'],dict(color='saddlebrown',fontsize=20,horizontalalignment='left',verticalalignment='top')],[[0.1, -0.22, 'Training Side'],dict(color='black',fontsize=20,horizontalalignment='left',verticalalignment='top')],[[0.9, -0.22, 'Validation Side'],dict(color='black',fontsize=20,horizontalalignment='right',verticalalignment='top')]
,[[0.5, 1.2, kwargs.get('ax_suptitle','')],dict(color='black',fontsize=25,horizontalalignment='center',verticalalignment='bottom')]
,[dict(line_order = [[0, 1], [2, 3]],labels=['Neural Network','Linear Regression'] ,ncol=1,shadow=1,labelspacing=0.8,fontsize=12,loc='upper right')],[dict(b=True,axis='both',alpha=0.5)]
]
]
return axis_args
args = []
for obs in ['Mid Price','Bid Price Expectation','Ask Price Expectation','Bid Price Variance','Ask Price Variance']:
for i,data_type in enumerate(['LOB','LIQ','LOB+LIQ']):
args += get_axis_args(data_type,obs,'fd',0.1,ax_suptitle=obs*int(i%3==1))
attrs = ['bar','bar','bar','bar'
,'bar','bar','bar','bar'
,'plot','plot','plot','plot'#just for legend
,'set_xlim'
,'axvline','axhline'
,'ticks'
,'set_xlabel','set_ylabel','set_title'
,'text','text','text','text'
,'text' # baslik icin
,'legend','grid']
plotter(args,attrs,fig_title='Error Distributions of Models\n Stock: GARAN\n Year: 2017',dpi=600, ncols=3,xpad=10,ypad=6, hspace = 0.65 ,suptitle_y=0.95,suptitle_x=0.51)